import pandas as pd
import numpy as np
import plotly.express as px
from IPython.core.display import display, HTML
imported_df = pd.read_csv('listings.csv')
df = imported_df.copy()
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 16055 entries, 0 to 16054 Data columns (total 18 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 id 16055 non-null int64 1 name 16045 non-null object 2 host_id 16055 non-null int64 3 host_name 16014 non-null object 4 neighbourhood_group 16055 non-null object 5 neighbourhood 16055 non-null object 6 latitude 16055 non-null float64 7 longitude 16055 non-null float64 8 room_type 16055 non-null object 9 price 16055 non-null int64 10 minimum_nights 16055 non-null int64 11 number_of_reviews 16055 non-null int64 12 last_review 11772 non-null object 13 reviews_per_month 11772 non-null float64 14 calculated_host_listings_count 16055 non-null int64 15 availability_365 16055 non-null int64 16 number_of_reviews_ltm 16055 non-null int64 17 license 10277 non-null object dtypes: float64(3), int64(8), object(7) memory usage: 2.2+ MB
Show number of missing values:
df.isna().sum()
id 0 name 10 host_id 0 host_name 41 neighbourhood_group 0 neighbourhood 0 latitude 0 longitude 0 room_type 0 price 0 minimum_nights 0 number_of_reviews 0 last_review 4283 reviews_per_month 4283 calculated_host_listings_count 0 availability_365 0 number_of_reviews_ltm 0 license 5778 dtype: int64
df.describe()
| id | host_id | latitude | longitude | price | minimum_nights | number_of_reviews | reviews_per_month | calculated_host_listings_count | availability_365 | number_of_reviews_ltm | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 1.605500e+04 | 1.605500e+04 | 16055.000000 | 16055.000000 | 16055.000000 | 16055.000000 | 16055.000000 | 11772.000000 | 16055.000000 | 16055.000000 | 16055.000000 |
| mean | 2.822959e+07 | 1.173823e+08 | 41.391995 | 2.167132 | 112.467331 | 12.970103 | 34.153348 | 1.164569 | 17.319028 | 164.437745 | 3.762940 |
| std | 1.594047e+07 | 1.232569e+08 | 0.014416 | 0.017599 | 325.455790 | 31.696885 | 67.383300 | 1.439324 | 33.352697 | 136.654295 | 8.811417 |
| min | 1.867400e+04 | 3.073000e+03 | 41.333420 | 2.091590 | 0.000000 | 1.000000 | 0.000000 | 0.010000 | 1.000000 | 0.000000 | 0.000000 |
| 25% | 1.504661e+07 | 8.813134e+06 | 41.380800 | 2.157060 | 40.000000 | 1.000000 | 0.000000 | 0.180000 | 1.000000 | 8.000000 | 0.000000 |
| 50% | 2.956314e+07 | 5.913604e+07 | 41.389450 | 2.168150 | 70.000000 | 3.000000 | 5.000000 | 0.700000 | 3.000000 | 153.000000 | 0.000000 |
| 75% | 4.231742e+07 | 2.131051e+08 | 41.401520 | 2.177390 | 125.000000 | 30.000000 | 35.000000 | 1.670000 | 16.000000 | 306.000000 | 4.000000 |
| max | 5.270268e+07 | 4.257934e+08 | 41.461930 | 2.229670 | 9999.000000 | 1124.000000 | 862.000000 | 27.000000 | 182.000000 | 365.000000 | 283.000000 |
df.head()
| id | name | host_id | host_name | neighbourhood_group | neighbourhood | latitude | longitude | room_type | price | minimum_nights | number_of_reviews | last_review | reviews_per_month | calculated_host_listings_count | availability_365 | number_of_reviews_ltm | license | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 18674 | Huge flat for 8 people close to Sagrada Familia | 71615 | Mireia And Maria | Eixample | la Sagrada Família | 41.40420 | 2.17306 | Entire home/apt | 121 | 1 | 21 | 2019-10-11 | 0.23 | 19 | 47 | 0 | HUTB-002062 |
| 1 | 23197 | Forum CCIB DeLuxe★Spacious &Elegant★Large Balcony | 90417 | Etain (Marnie) | Sant Martí | el Besòs i el Maresme | 41.41291 | 2.22063 | Entire home/apt | 220 | 4 | 52 | 2019-12-15 | 0.74 | 2 | 86 | 0 | HUTB-005057 |
| 2 | 32711 | Sagrada Familia area - Còrsega 1 | 135703 | Nick | Gràcia | el Camp d'en Grassot i Gràcia Nova | 41.40566 | 2.17015 | Entire home/apt | 144 | 2 | 63 | 2019-09-06 | 0.60 | 3 | 85 | 0 | HUTB-001722 |
| 3 | 34981 | VIDRE HOME PLAZA REAL on LAS RAMBLAS | 73163 | Andres | Ciutat Vella | el Barri Gòtic | 41.37978 | 2.17623 | Entire home/apt | 181 | 4 | 156 | 2020-03-11 | 1.55 | 2 | 136 | 7 | HUTB-001506 |
| 4 | 35379 | Double 04 CasanovaRooms Barcelona | 152232 | Pablo | Eixample | l'Antiga Esquerra de l'Eixample | 41.39036 | 2.15274 | Private room | 41 | 2 | 358 | 2021-07-21 | 4.01 | 4 | 193 | 41 | Exempt |
import plotly.express as px
fig = px.scatter_mapbox(df, lat="latitude", lon="longitude",
color_discrete_sequence=["blue"], zoom=12.5, height=800, opacity=1,
title='Airbnb properties in Barcelona')
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":30,"l":0,"b":0})
fig.show()
fig = px.scatter_mapbox(df, lat="latitude", lon="longitude",
color='room_type', zoom=12.5, height=800, opacity=1,
title='Airbnb properties in Barcelona')
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":30,"l":0,"b":0})
fig.show()
for district in df.neighbourhood.value_counts().index[:1]:
dss = df[df.neighbourhood == district]
dssvc = dss.room_type.value_counts()
n = len(dssvc.index)
labels = dssvc.index.to_list()
parents = ([district]*n),
parents = parents[0]
vals = dssvc.to_list()
for district in df.neighbourhood.value_counts().index[1:5]:
dss = df[df.neighbourhood == district]
dssvc = dss.room_type.value_counts()
n = len(dssvc.index)
labels += dssvc.index.to_list()
addparents = [district]*n,
parents += addparents[0]
vals = vals + dssvc.to_list()
df_top_district = pd.DataFrame([labels, vals, parents])
df_top_district = df_top_district.T
df_top_district.columns = ['type','eff','district']
df_top_district
| type | eff | district | |
|---|---|---|---|
| 0 | Entire home/apt | 1285 | la Dreta de l'Eixample |
| 1 | Private room | 599 | la Dreta de l'Eixample |
| 2 | Hotel room | 70 | la Dreta de l'Eixample |
| 3 | Shared room | 41 | la Dreta de l'Eixample |
| 4 | Private room | 653 | el Raval |
| 5 | Entire home/apt | 569 | el Raval |
| 6 | Hotel room | 9 | el Raval |
| 7 | Shared room | 8 | el Raval |
| 8 | Entire home/apt | 551 | el Barri Gòtic |
| 9 | Private room | 529 | el Barri Gòtic |
| 10 | Shared room | 13 | el Barri Gòtic |
| 11 | Hotel room | 1 | el Barri Gòtic |
| 12 | Entire home/apt | 534 | Sant Pere, Santa Caterina i la Ribera |
| 13 | Private room | 405 | Sant Pere, Santa Caterina i la Ribera |
| 14 | Hotel room | 18 | Sant Pere, Santa Caterina i la Ribera |
| 15 | Shared room | 11 | Sant Pere, Santa Caterina i la Ribera |
| 16 | Entire home/apt | 631 | la Sagrada Família |
| 17 | Private room | 323 | la Sagrada Família |
| 18 | Hotel room | 5 | la Sagrada Família |
| 19 | Shared room | 4 | la Sagrada Família |
fig = px.sunburst(df_top_district, path=['district','type'], values='eff' )
# Update layout for tight margin
# See https://plot.ly/python/creating-and-updating-figures/
fig.update_layout(margin = dict(t=30, l=0, r=0, b=0))
fig.update_layout(title_text="Room number per district and room type")
fig.show()
df_top_district['percent'] = df_top_district.apply(lambda x: 100 * x.eff / df[df.neighbourhood==x.district]['id'].count() , axis=1)
df_top_district
| type | eff | district | percent | |
|---|---|---|---|---|
| 0 | Entire home/apt | 1285 | la Dreta de l'Eixample | 64.411028 |
| 1 | Private room | 599 | la Dreta de l'Eixample | 30.025063 |
| 2 | Hotel room | 70 | la Dreta de l'Eixample | 3.508772 |
| 3 | Shared room | 41 | la Dreta de l'Eixample | 2.055138 |
| 4 | Private room | 653 | el Raval | 52.703793 |
| 5 | Entire home/apt | 569 | el Raval | 45.924132 |
| 6 | Hotel room | 9 | el Raval | 0.726392 |
| 7 | Shared room | 8 | el Raval | 0.645682 |
| 8 | Entire home/apt | 551 | el Barri Gòtic | 50.365631 |
| 9 | Private room | 529 | el Barri Gòtic | 48.354662 |
| 10 | Shared room | 13 | el Barri Gòtic | 1.188300 |
| 11 | Hotel room | 1 | el Barri Gòtic | 0.091408 |
| 12 | Entire home/apt | 534 | Sant Pere, Santa Caterina i la Ribera | 55.165289 |
| 13 | Private room | 405 | Sant Pere, Santa Caterina i la Ribera | 41.838843 |
| 14 | Hotel room | 18 | Sant Pere, Santa Caterina i la Ribera | 1.859504 |
| 15 | Shared room | 11 | Sant Pere, Santa Caterina i la Ribera | 1.136364 |
| 16 | Entire home/apt | 631 | la Sagrada Família | 65.524403 |
| 17 | Private room | 323 | la Sagrada Família | 33.541018 |
| 18 | Hotel room | 5 | la Sagrada Família | 0.519211 |
| 19 | Shared room | 4 | la Sagrada Família | 0.415369 |
fig1 = px.bar(
df_top_district,
x='district',
y='eff',
facet_col='type',
width=300*5,
color='type'
)
fig2 = px.bar(
df_top_district,
x='type',
y='percent',
facet_col='district',
width=300*5,
color='type'
)
fig3 = px.bar(
df_top_district,
facet_col='type',
y='percent',
x='district',
width=300*5,
color='type'
)
fig1.update_layout(title_text="Room number per type per district")
fig1.update_xaxes(title_text="")
fig2.update_xaxes(title_text="")
fig2.update_layout(title_text="Room type proportion per district")
fig3.update_xaxes(title_text="")
fig3.update_layout(title_text="Room type proportion comparison per district")
display(HTML('<h2>Room type analysis the first 5 districts</h2>'))
fig1.show()
fig2.show()
fig3.show()